#' ---
#' title: "Text as Behavior: Afrobarometer"
#' date: "`r Sys.Date()`"
#' output: pdf_document
#' header-includes:
#'  - \usepackage{booktabs}
#'  - \usepackage{longtable}
#'  - \usepackage{array}
#'  - \usepackage{multirow}
#'  - \usepackage{wrapfig}
#'  - \usepackage{float}
#'  - \usepackage{colortbl}
#'  - \usepackage{pdflscape}
#'  - \usepackage{tabu}
#'  - \usepackage{threeparttable}
#'  - \usepackage{threeparttablex}
#'  - \usepackage[normalem]{ulem}
#'  - \usepackage{makecell}
#'  - \usepackage{setspace}\doublespacing
#'  - \usepackage{dcolumn}
#' ---

## ----  spin_code_afro, eval = FALSE, include = FALSE ----
# spin code to output Rmd
# set knit = FALSE to generate Rmd / Rnw but not compile

# rmarkdown::render(input = here::here("text_code", "multilingual_text_analysis.R"), output_format = "pdf_document", clean = TRUE)


## ----  load_packages_afro, echo = FALSE ----
source(here::here("text_code/text_packages_rep.R"))
source(here::here("text_code/custom_ggplot_themes.R"))
source(here::here("text_code/custom_table_functions_rep.R"))

set_theme(theme_book())

pm2 <- function(mod, vars, legend = TRUE, title = "") {
    plot_model(mod,
               show.legend = legend,
               title = title,
               type = "pred",
               terms = vars) +
        theme(legend.pos = "bottom")
}


#' --all interviews conducted face to face in native languages and then translated into english, french, or portuguese
#'
#' --democracy battery made up of 8 questions about whether liberal democracy is important

## ----  load_raw_data_afro, echo = FALSE, eval = FALSE ----
# load in afrobarometer data
# error let to adding encoding="latin1": https://github.com/tidyverse/haven/issues/615
afro <- read_sav(here("text_data_raw", "Afrobarometer", "merged_r6_data_2016_36countries2.sav"), encoding="latin1") %>%
    clean_names()

## ----  recode_afro_variables, eval = FALSE ----
# recode relevant variables
afro <- afro %>%
    mutate(
        
        # explanatory factors
        understood_dem = case_when(
            q29a == 1 ~ "Eng-French-Port",
            q29a == 2 ~ "Local language",
            q29a == 3 ~ "Did not understand",
            q29a == -1 ~ "Missing",
            TRUE      ~ NA_character_
        ) %>% fct_relevel("Eng-French-Port",
                          "Local language",
                          "Did not understand"),
        
        # democracy open-ended responses
        # democracy1 = ifelse(q29b == "  " | q29b == "" | q29b == " ", NA, q29b),
        # democracy2 = ifelse(q29c == "  " | q29c == "" | q29c == " ", NA, q29c),
        # democracy3 = ifelse(q29d == "  " | q29d == "" | q29d == " ", NA, q29d),
        # democracy_all = paste(democracy1, democracy2, democracy3),
        
        democracy1 = ifelse(q29b == "  " | q29b == "" | q29b == " ", "", q29b),
        democracy2 = ifelse(q29c == "  " | q29c == "" | q29c == " ", "", q29c),
        democracy3 = ifelse(q29d == "  " | q29d == "" | q29d == " ", "", q29d),
        democracy_all = paste(democracy1, democracy2, democracy3, sep = ", "),
        
        democracy_all = str_replace_all(democracy_all,", $|, , $", ""),
        
        # PORTUGESE CLEAN UP
        # Use str_remove_all to remove any character in the Unicode range \u0080 to \u009F
        # control characters in the \u0080 to \u009F range do not have a visual representation and are used to control text formatting
        democracy_all = str_remove_all(democracy_all,"[\u0080-\u009F]"),
        democracy_all = str_replace_all(democracy_all,"Ã", "A"),
        
        nchar_dem_total = nchar(democracy_all),
        
        # count_char_afro
        # counting characters
        # nchar_dem1 = ifelse(democracy1 == "", 0, nchar(democracy1)),
        # nchar_dem2 = ifelse(democracy2 == "", 0, nchar(democracy2)),
        # nchar_dem3 = ifelse(democracy3 == "", 0, nchar(democracy3)),
        # 
        # nchar_dem_total = rowSums(select(., nchar_dem1, nchar_dem2, nchar_dem3), na.rm = TRUE),
        
        
        # Question: What, if anything, does “democracy: mean to you? First verbatim response
        # Value Labels: 0= Nothing/Democracy has no meaning, 
        # 1= Positive Replies: Civil liberties / personal freedoms (eg freedom of speech, religion, movement, etc…), 
        # 2=Positive Replies: Government by, for, of the people / popular rule, 
        # 3=Positive Replies: Voting / elections / multiparty competition, 
        # 4=Positive Replies: Peace / unity / power sharing, 
        # 5=Positive Replies Social / economic development, 
        # 6=Positive Replies Equality / justice, 
        # 7=Positive Replies Majority rule, 
        # 8=Positive Replies Governance / effectiveness / accountability / rule of law, 
        # 9=Positive Replies National independence / people’s self-determination,
        # 10=Positive Replies Mutual respect, 
        # 11=Positive Replies Working together, 
        # 12=Positive Replies Other positive meanings, 
        # 13=Negative Replies: Conflict / confusion, 
        # 14=Negative Replies: Corruption / abuse of power,
        # 15=Negative Replies: Social / economic hardship, 
        # 16=Negative Replies: Other negative meanings, 
        # 17=Null / neutral replies: Civilian politics / government, 
        # 18=Null / neutral replies: Change of government / leadership / laws, 
        # 19=Null / neutral replies: Other null/neutral meanings, 
        # 9999=Don’t know / Did not understand the question,
        # -1=Missing
        
        dem_valence1 = case_when(
            q29e >= 1  & q29e <= 12  ~ +1,
            q29e >= 13 & q29e <= 16  ~  0,
            q29e >= 17 & q29e <= 9996 ~ 0,
            TRUE ~ NA_real_
        ),
        dem_valence2 = case_when(
            q29f >= 1  & q29f <= 12  ~ +1,
            q29f >= 13 & q29f <= 16  ~  0,
            q29f >= 17 & q29f <= 9996 ~ 0,
            TRUE ~ NA_real_
        ),
        dem_valence3 = case_when(
            q29g >= 1  & q29g <= 12  ~ +1,
            q29g >= 13 & q29g <= 16  ~  0,
            q29g >= 17 & q29g <= 9996 ~ 0,
            TRUE ~ NA_real_
        )
    )

afro <- afro %>%
    mutate(    
        
        # need to separate rowSums(select... into new chunk so new cols are visible    
        dem_valence_tot = rowSums(select(., dem_valence1, dem_valence2, dem_valence3), na.rm = TRUE),
        
        # explanatory factors
        gender = thisint,
        #race = ifelse(q102 >= 1 & q102 < 90, as.character(q102), NA),
        race = case_when(
            q102 == 1  ~ "Black-African",
            q102 == 2  ~ "White-European",
            q102 == 3  ~ "Colored-Mixed Race",
            q102 == 4  ~ "Arab-North African",
            q102 == 5  ~ "South Asian",
            q102 == 6  ~ "East Asian",
            q102 == -1 ~ "Missing",
            q102 == 95 ~ "Other"
        ) %>% fct_relevel("Black-African"),
        race5 = case_when(
            q102 == 1  ~ "Black-African",
            #q102 == 2  ~ "White-European",
            q102 == 3  ~ "Colored-Mixed Race",
            q102 == 4  ~ "Arab-North African",
            q102 == 5  ~ "South Asian",
            #q102 == 6  ~ "East Asian",
            #q102 == -1 ~ "Missing",
            TRUE       ~ "Other"
        ) %>% fct_relevel("Black-African"),
        educ = ifelse(q97 > -1 & q97 < 90, q97, NA),
        age = ifelse(q1 > -1 & q1 < 90, q1, NA),
        income_proxy = ifelse(
            q8a >= 0 & q8a <= 4 & q8b >= 0 & q8b <= 4 & q8c >= 0 & q8c <= 4 & q8d >= 0 & q8d <= 4 &
                q8e >= 0 & q8e <= 4,
            q8a + q8b + q8c + q8d + q8e, NA
        ),
        
        # civic engagement battery
        civic2a = ifelse(q24a >= 0 & q24a <= 3, q24a, NA),
        civic3a = ifelse(q24b >= 0 & q24b <= 3, q24b, NA),
        civic4a = ifelse(q24c >= 0 & q24c <= 3, q24c, NA),
        civic5a = ifelse(q24d >= 0 & q24d <= 3, q24d, NA),
        civic6a = ifelse(q24e >= 0 & q24e <= 3, q24e, NA),
        civic1a = ifelse(q24f >= 0 & q24f <= 3, q24f, NA),
        
        civic1b = ifelse(q27a >= 0 & q27a <= 4, q27a, NA),
        civic2b = ifelse(q27b >= 0 & q27b <= 4, q27b, NA),
        civic3b = ifelse(q27c >= 0 & q27c <= 4, q27c, NA),
        civic4b = ifelse(q27d >= 0 & q27d <= 4, q27d, NA),
        civic5b = ifelse(q27e >= 0 & q27e <= 4, q27e, NA),
        
        civic = ((civic1a + civic2a + civic3a + civic4a + civic5a + civic6a) / 18) +
            ((civic1b + civic2b + civic3b + civic4b + civic5b) / 20),
        
        # Support for democracy
        # 1=Statement 3: Doesn’t matter, 
        # 2=Statement 2: Sometimes non-democratic preferable,
        # 3=Statement 1: Democracy preferable, 
        # 9=Don’t know, 98=Refused to answer, -1=Missing
        dem_support = case_when(
            q30 == 1 ~ 1,
            q30 == 2 ~ 2,
            q30 == 3 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        # recoded for 5 point scale
        dem_support5 = case_when(
            q30 == 1 ~ 3,
            q30 == 2 ~ 1,
            q30 == 3 ~ 5,
            TRUE ~ as.numeric(NA)
        ),
        
        # democracy importance battery
        # 1=Agree very strongly with Statement 1, 2=Agree with Statement 1, 
        # 3=Agree with Statement 2, 4=Agree very strongly with Statement 2, 
        # 5=Agree with neither, 9=Don’t know, 98=Refused to answer, -1=Missing,
        # 3=Statement 1: Democracy preferable, 9=Don’t know, 98=Refused to answer, 
        # -1=Missing
        dem_efficient = case_when(
            q31 == 1 ~ 1,
            q31 == 2 ~ 2,
            q31 == 3 ~ 4,
            q31 == 4 ~ 5,
            q31 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_elect = case_when(
            q32 == 1 ~ 5,
            q32 == 2 ~ 4,
            q32 == 3 ~ 2,
            q32 == 4 ~ 1,
            q32 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_parties = case_when(
            q33 == 1 ~ 1,
            q33 == 2 ~ 2,
            q33 == 3 ~ 4,
            q33 == 4 ~ 5,
            q33 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_monitor = case_when(
            q34 == 1 ~ 5,
            q34 == 2 ~ 4,
            q34 == 3 ~ 2,
            q34 == 4 ~ 1,
            q34 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_opposition = case_when(
            q35 == 1 ~ 5,
            q35 == 2 ~ 4,
            q35 == 3 ~ 2,
            q35 == 4 ~ 1,
            q35 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_media = case_when(
            q36 == 1 ~ 5,
            q36 == 2 ~ 4,
            q36 == 3 ~ 2,
            q36 == 4 ~ 1,
            q36 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_parl = case_when(
            q37 == 1 ~ 5,
            q37 == 2 ~ 4,
            q37 == 3 ~ 2,
            q37 == 4 ~ 1,
            q37 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_obey = case_when(
            q38 == 1 ~ 1,
            q38 == 2 ~ 2,
            q38 == 3 ~ 4,
            q38 == 4 ~ 5,
            q38 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        ),
        dem_limit = case_when(
            q39 == 1 ~ 5,
            q39 == 2 ~ 4,
            q39 == 3 ~ 2,
            q39 == 4 ~ 1,
            q39 == 5 ~ 3,
            TRUE ~ as.numeric(NA)
        )
    )


# afro <- afro %>%
#     mutate(
#         across(.cols = c(dem_support5, dem_efficient, dem_elect, dem_parties, dem_monitor, dem_opposition, dem_media, dem_parl, dem_obey, dem_limit),
#                   .fns = ~replace_na(., 0), # Replace NAs with 0s
#                   .names = "{.col}_na0")) %>% # Create new columns for these transformations
#     mutate(
#         dem_importance_mean = rowMeans(select(., ends_with("na0")), na.rm = TRUE),
#         dem_importance_small =  #  scale 1-5 with 5 being more pro-democracy
#             rowMeans(select(., dem_support5_na0, dem_elect_na0, dem_parties_na0, dem_obey_na0, dem_limit_na0), na.rm = TRUE),
# 
#         ) %>%
# 
#     select(-ends_with("na0") # remove the temporary columns
# 
#            )


afro <- afro %>%
    mutate(    
        # dem_importance = ( #  scale 1-5 with 5 being more pro-democracy
        #   dem_efficient + dem_elect + dem_parties + dem_monitor + dem_media + dem_parl + dem_obey + dem_limit) / 8,
        
        dem_importance =  #  scale 1-5 with 5 being more pro-democracy
            rowSums(select(., dem_support5, dem_efficient, dem_elect, dem_parties, dem_monitor, dem_opposition, dem_media, dem_parl, dem_obey, dem_limit), na.rm = TRUE),
        
        dem_importance_mean =  #  scale 1-5 with 5 being more pro-democracy
            rowMeans(select(., dem_support5, dem_efficient, dem_elect, dem_parties, dem_monitor, dem_opposition, dem_media, dem_parl, dem_obey, dem_limit), na.rm = TRUE),
        
        dem_importance_small =  #  scale 1-5 with 5 being more pro-democracy
            rowSums(select(., dem_support5, dem_elect, dem_parties, dem_obey, dem_limit), na.rm = TRUE),
        
        # democracy in home country
        home_isdem = case_when(
            q40 == 1 ~ 1,
            q40 == 2 ~ 2,
            q40 == 3 ~ 3,
            q40 == 4 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        home_satisfy = case_when(
            q41 == 1 ~ 1,
            q41 == 2 ~ 2,
            q41 == 3 ~ 3,
            q41 == 4 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        home_dem = (home_isdem + home_satisfy) / 2, #  scale 1-4 with 4 being home country is more of a functioning democracy
        
        # corruption battery
        corrup_pres = case_when(
            q53a == 0 ~ 1,
            q53a == 1 ~ 2,
            q53a == 2 ~ 3,
            q53a == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_parl = case_when(
            q53b == 0 ~ 1,
            q53b == 1 ~ 2,
            q53b == 2 ~ 3,
            q53b == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_govt = case_when(
            q53c == 0 ~ 1,
            q53c == 1 ~ 2,
            q53c == 2 ~ 3,
            q53c == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_local = case_when(
            q53d == 0 ~ 1,
            q53d == 1 ~ 2,
            q53d == 2 ~ 3,
            q53d == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_police = case_when(
            q53e == 0 ~ 1,
            q53e == 1 ~ 2,
            q53e == 2 ~ 3,
            q53e == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_tax = case_when(
            q53f == 0 ~ 1,
            q53f == 1 ~ 2,
            q53f == 2 ~ 3,
            q53f == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_judge = case_when(
            q53g == 0 ~ 1,
            q53g == 1 ~ 2,
            q53g == 2 ~ 3,
            q53g == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_tradit = case_when(
            q53h == 0 ~ 1,
            q53h == 1 ~ 2,
            q53h == 2 ~ 3,
            q53h == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_relig = case_when(
            q53i == 0 ~ 1,
            q53i == 1 ~ 2,
            q53i == 2 ~ 3,
            q53i == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        ),
        corrup_exec = case_when(
            q53j == 0 ~ 1,
            q53j == 1 ~ 2,
            q53j == 2 ~ 3,
            q53j == 3 ~ 4,
            TRUE ~ as.numeric(NA)
        )
    )

afro <- afro %>%
    mutate(    
        corrup_sum = rowSums(select(., corrup_exec, corrup_relig, corrup_tradit, corrup_judge, corrup_tax, corrup_police, corrup_local, corrup_govt, corrup_parl, corrup_pres)),
        corrup_mean = rowMeans(select(., corrup_exec, corrup_relig, corrup_tradit, corrup_judge, corrup_tax, corrup_police, corrup_local, corrup_govt, corrup_parl, corrup_pres)),
        #corrup = (corrup_exec + corrup_relig + corrup_tradit + corrup_judge + corrup_tax + #  scale 0-3 on how corrupt R thinks local institutions are
        #corrup_police + corrup_local + corrup_govt + corrup_parl + corrup_pres) / 10,
        
        # non-democratic government battery
        govt_oneparty = case_when(
            q28a == 1 ~ 1,
            q28a == 2 ~ 2,
            q28a == 3 ~ 3,
            q28a == 4 ~ 4,
            q28a == 5 ~ 5,
            TRUE ~ as.numeric(NA)
        ),
        govt_army = case_when(
            q28b == 1 ~ 1,
            q28b == 2 ~ 2,
            q28b == 3 ~ 3,
            q28b == 4 ~ 4,
            q28b == 5 ~ 5,
            TRUE ~ as.numeric(NA)
        ),
        govt_dictator = case_when(
            q28c == 1 ~ 1,
            q28c == 2 ~ 2,
            q28c == 3 ~ 3,
            q28c == 4 ~ 4,
            q28c == 5 ~ 5,
            TRUE ~ as.numeric(NA)
        ),
        govt_islamlaw = case_when(
            q28d_naf == 1 ~ 1,
            q28d_naf == 2 ~ 2,
            q28d_naf == 3 ~ 3,
            q28d_naf == 4 ~ 4,
            q28d_naf == 5 ~ 5,
            TRUE ~ as.numeric(NA)
        ),
        govt = (govt_islamlaw + govt_army + govt_dictator + govt_oneparty) / 4, # support for non-democratic gov't types 1-4
        
        # did R vote in last national election
        vote = ifelse(q21 == 1, 1, 0)
    )

# remove_char_afro
# taking out 14 rows with special characters
# with R upgrade, this no longer seems necessary (?)
sc <- which(str_detect(afro$democracy_all, "\xc7"))
if(length(sc)>0) {afro <- afro[-sc, ]}


# lang_afro
# coding language of response
eng <- c(
    1, 4, 5, 7, 16, 20, 140, 260, 262, 263, 265, 300, 301, 302, 303, 304, 305, 306, 307, 310, 340, 380,
    460, 463, 581, 582, 583, 621, 623, 702, 703, 704, 705, 708, 709, 780, 781, 782, 783, 784,
    787, 788, 789, 790, 792, 798, 799, 820, 821, 822, 823, 824, 860, 861, 900, 930, 931, 932, 1221, 1620
)
french <- c(
    2, 102, 104, 1180, 106, 100, 35, 101, 105, 15, 180, 18, 32, 183, 184, 420, 421, 501, 514, 517, 518,
    660, 661, 665, 1100, 1101, 1142, 1143, 1144, 1145, 1220, 1222, 1300, 1302, 1305, 1307, 1312
)
port <- c(3, 220, 221, 540, 541, 542, 543, 1660)

afro <- afro %>% mutate(
    lang = case_when(
        q103 %in% eng    == TRUE ~ "English",
        q103 %in% french == TRUE ~ "French",
        q103 %in% port   == TRUE ~ "Portuguese",
        TRUE ~ NA_character_
    )
)

## ---- save_processed_afro_data ----

save(afro, file = here("text_data_output/afrobarometer_processed.Rdata"))

